import urllib.request
import pandas as pd
from bs4 import BeautifulSoup

djpage = 0

http = "https://baike.baidu.com/"

with urllib.request.urlopen(http) as response:
    html = response.read()
    # print("开始" + html.decode())
    bf = BeautifulSoup(html, "lxml")
    title = bf.title.string
    # print("开始" + str(title))
    # print("开始" + str(bf))
    # print("开始" + bf.get_text())
    # print("开始" + str(bf.find_all("div", class_="card_cnt_cnt")))
    # print("开始" + str(bf.find_all("div", class_="card_cnt_cnt")[0].get_text()))
    # listindex = bf.find_all("div", class_="pagination")[0].find_all("li", class_="active")[0].find('a').get_text()
    # djpage = listindex
    # listlen = len(bf.find_all("div", class_="card_cnt_cnt"))
    # print("开始" + str(listlen))
    # listlen = bf.find_all("div", class_="card_cnt_tit")[0].find("a").get_text()
    # print("开始" + str(listlen))

    # listlen = len(bf.find_all("div", class_="card_cnt_tit"))
    # print("开始" + str(listlen))

    # listlen = len(bf.find_all("div", class_="card_cnt_tit"))
    # print("1" + str(listlen))
    # listlen2 = len(bf.find_all("div", class_="card_cnt_cnt"))
    # print("2" + str(listlen2))

    # print("2"+len(bf.find_all("div", class_="card_cnt_cnt")))

    card_cnt_tit = bf.find_all("div", class_="card_cnt_tit")
    title = []
    for tag in card_cnt_tit:
        # print(tag.find('a').get_text())
        title.append(tag.find('a').get_text())
    # print('Link:', tag.find('a').get_text())  # 输出<a>标签的内容

    card_cnt = bf.find_all("div", class_="card_cnt_cnt")
    content = []
    for tag in card_cnt:
        # print(tag.get_text())
        content.append(tag.get_text())

    # 将数据保存到DataFrame中
    data = {'标题': title, '内容': content}
    print(data)
    df = pd.DataFrame(data)
    print(df)

    # 将数据保存到Excel文件中
    df.to_excel('99.xlsx', index=False)
